/******************************************************;
*This do file creates Appendix Table 3;

******************************************************/

capture clear
clear matrix
clear mata
capture macro drop _all
set mem 1000m
set more off

#delimit ;

*UPDATE PATH NAMES;
global ched_data 
global output 

******************************************************;
*Create indicator for high versus low nurse migration provinces to merge with Census;
use "${data}analysis_sample.dta";


keep province high1
merge n:1 province using "${geo_data}nscb_1990codes.dta"  
keep if _merge==3
drop _merge

egen tag=tag(prv)
keep if tag==1
drop tag

tostring prv, replace
gen length=strlen(prv)
replace prv="0"+prv if length==1

replace prv="39" if province=="THIRD DISTRICT"

keep province high1 prv 
gen prov_school=prv
gen high1_school=high1
destring prov_school, replace

save "${geo_data}province_high_crosswalk.dta", replace

******************************************************;
*2000 Census;
use "${data}Census2000_10percent.dta", clear;

*drop ofws;
drop if overseas==1;

*Clean up geographic codes to match 1990 definitions used in nursing data;
replace prv="39" if prv=="74";
replace prv="39" if prv=="75";
replace prv="39" if prv=="76";
replace prv="63" if prv=="80";
replace prv="32" if prv=="81";
replace prv="23" if prv=="82";


*Create indicators for high and low migration provinces;
merge n:1 prv using "${geo_data}province_high_crosswalk.dta", keepusing(high1); 
drop if _merge!=3;
drop _merge;

*Non-merges are Sulu, Tawi-tawi, and Cotabato City, all of which are dropped from main analysis as discussed in the appendix;

*Determine province the school was in;
tostring place_school, replace;
gen length=strlen(place_school);
replace place_school="0"+place_school if length==3;
replace place_school="" if place_school==".";

gen prov_school=substr(place_school, 1,2) if place_school!="7"&place_school!="8"&place_school!="9"&place_school!="9999"&place_school!="9899";
destring prov_school, replace;

*Clean geographic codes to match 1990 geographic boundaries used in nursing data;
replace prov_school=39 if prov_school==74;
replace prov_school=39 if prov_school==75;
replace prov_school=39 if prov_school==76;
replace prov_school=63 if prov_school==80;
replace prov_school=32 if prov_school==81;
replace prov_school=23 if prov_school==82;


*Create indicators for high migration schooling provinces;
merge n:1 prov_school using "${geo_data}province_high_crosswalk.dta", keepusing(high1_school);
drop _merge;

destring prv, replace;

*Create number of college aged individuals;
gen college_age_1822=1 if age>=18&age<=22;

*Create indicicator for in college and actually attending school;
gen college_age_attend_1822=1 if college_age_1822==1&school_attend==1;

*Create school in different province variables;
gen mover=1 if prv!=prov_school&prov_school!=.;

*Create indicators for college aged and schooling in another province;
foreach x in "1822" {;

	gen school_diff_prov_all_`x'=1 if mover==1&college_age_attend_`x'==1;
	gen school_diff_prov_ltoh_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==1;
	gen school_diff_prov_htol_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==0;
	gen school_diff_prov_htoh_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==1;
	gen school_diff_prov_ltol_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==0;
	
};

*Drop if provinces are not in sample: Tawi-Tawi, Sulu, Cotabato, Isabela;
drop if inlist(prv, 66, 70, 97, 98, 99);
*Also drop if the child goes to school in these provinces;
drop if inlist(prov_school, 66, 70, 97, 98, 99);

collapse (sum) school_diff* college_age* [pw=popwgt];

foreach x in "_all" "_ltoh" "_htol" "_htoh" "_ltol" {;
	gen rate`x'_1822=school_diff_prov`x'_1822/college_age_attend_1822;

};

list rate*1822;

******************************************************;
*2007 Census;

foreach y in "1" "2" "3" "4A" "4B" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" {;
	use "${data}Region`y'.dta", clear;

	*drop ofws;
	drop if ofw==1;

	*Clean geographic codes to match 1990 geographic boundaries used in nursing data;
	replace prv="39" if prv=="74";
	replace prv="39" if prv=="75";
	replace prv="39" if prv=="76";
	replace prv="63" if prv=="80";
	replace prv="32" if prv=="81";
	replace prv="23" if prv=="82";
	replace prv="73" if prv=="83";
	replace prv="67" if prv=="85";

	*Clean geographic codes to match 1990 geographic boundaries used in nursing data;
	rename ppos prov_school;
	replace prov_school=39 if prov_school==74;
	replace prov_school=39 if prov_school==75;
	replace prov_school=39 if prov_school==76;
	replace prov_school=63 if prov_school==80;
	replace prov_school=32 if prov_school==81;
	replace prov_school=23 if prov_school==82;
	replace prov_school=73 if prov_school==83;
	replace prov_school=67 if prov_school==85;
	
	gen length=strlen(prv);
	replace prv="0"+prv if length==1;
	drop length;

	*Create indicators for high and low migration provinces;
	merge n:1 prv using "${data_save}province_high_crosswalk.dta", keepusing(high1);
	tab prv if _merge==1;
	drop if _merge!=3;
	drop _merge;

	*Create indicators for high migration schooling provinces;
	merge n:1 prov_school using "${data_save}province_high_crosswalk.dta", keepusing(high1_school);
	tab prov_school if _merge==1&prov_school!=.;
	drop _merge;

	destring prv, replace;

	*Create number of college aged individuals;
	gen college_age_1822=1 if age>=18&age<=22;


	*Create indicicator for in college and actually attending school;
	rename cas school_attend;
	gen college_age_attend_1822=1 if college_age_1822==1&school_attend==1;

	*Create school in different province variables;
	gen mover=1 if prv!=prov_school&prov_school!=.&prov_school!=0;

	*Create indicators for college aged and schooling in another province;
	foreach x in"1822" {;

		gen school_diff_prov_all_`x'=1 if mover==1&college_age_attend_`x'==1;
		gen school_diff_prov_ltoh_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==1;
		gen school_diff_prov_htol_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==0;
		gen school_diff_prov_htoh_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==1;
		gen school_diff_prov_ltol_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==0;
		
	};
	
	*Drop if provinces are not in sample: Tawi-Tawi, Sulu, Cotabato, Isabela;
	drop if inlist(prv, 66, 70, 97, 98, 99);
	*Also drop if the child goes to school in these provinces;
	drop if inlist(prov_school, 66, 70, 97, 98, 99);

	collapse (sum) school_diff* college_age*, by(prov_school prv high1 high1_school);

	save "${data}2007_cen_nursecodes_Reg`y'.dta", replace;
};

use "${data}2007_cen_nursecodes_Reg1.dta";
foreach x in "2" "3" "4A" "4B" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" {;
	append using  "${data}2007_cen_nursecodes_Reg`x'.dta";
};

save "${data}2007_cen_nurseschoolpairs.dta", replace;

preserve;
collapse (sum) school_diff* college_age*;

foreach x in "_all" "_ltoh" "_htol" "_htoh" "_ltol" {;
	gen rate`x'_1821=school_diff_prov`x'_1821/college_age_attend_1822;

};

list rate*1822;

******************************************************;
*2010 Census;
use "${data}2010_Census_Form3.dta", clear;

*drop ofws;
drop if ofw==1;

*Clean up geographic codes to match 1990 boundaries used in nursing data;
replace prv="39" if prv=="74";
replace prv="39" if prv=="75";
replace prv="39" if prv=="76";
replace prv="63" if prv=="80";
replace prv="32" if prv=="81";
replace prv="23" if prv=="82";
replace prv="73" if prv=="83";
replace prv="67" if prv=="85";

*Clean up geographic codes to match 1990 boundaries used in nursing data;
replace prov_school=39 if prov_school==74;
replace prov_school=39 if prov_school==75;
replace prov_school=39 if prov_school==76;
replace prov_school=63 if prov_school==80;
replace prov_school=32 if prov_school==81;
replace prov_school=23 if prov_school==82;
replace prov_school=73 if prov_school==83;
replace prov_school=67 if prov_school==85;


*Create indicators for high and low migration provinces;
merge n:1 prv using "${data_save}province_high_crosswalk.dta", keepusing(high1);
drop if _merge!=3;
drop _merge;


*Create indicators for high migration schooling provinces;
merge n:1 prov_school using "${data_save}province_high_crosswalk.dta", keepusing(high1_school);

drop _merge;

destring prv, replace;

*Create number of college aged individuals;
gen college_age_1822=1 if cp5_age>=18&cp5_age<=22;

*Create indicicator for in college and actually attending school;
gen college_age_attend_1822=1 if college_age_1822==1&school_attend==1;

*Create school in different province variables;
gen mover=1 if prv!=prov_school&prov_school!=.&prov_school!=0;

*Create indicators for college aged and schooling in another province;
foreach x in "1822" {;

	gen school_diff_prov_all_`x'=1 if mover==1&college_age_attend_`x'==1;
	gen school_diff_prov_ltoh_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==1;
	gen school_diff_prov_htol_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==0;
	gen school_diff_prov_htoh_`x'=1 if mover==1&high1==1&college_age_attend_`x'==1&high1_school==1;
	gen school_diff_prov_ltol_`x'=1 if mover==1&high1==0&college_age_attend_`x'==1&high1_school==0;
	
};

*Drop if provinces are not in sample: Tawi-Tawi, Sulu, Cotabato, Isabela;
drop if inlist(prv, 66, 70, 97, 98, 99);
*Also drop if the child goes to school in these provinces;
drop if inlist(prov_school, 66, 70, 97, 98, 99);

save "${data}2010_cen_nursecodes.dta", replace;

collapse (sum) school_diff* college_age* [pw=popwght];

foreach x in "_all" "_ltoh" "_htol" "_htoh" "_ltol" {;
	gen rate`x'_1822=school_diff_prov`x'_1822/college_age_attend_1822;
};

list rate*1822;
